# library(rbiom)
# library(kableExtra)
# library(ggplot2)
# library(ggpubr)
# library(ggforce)
# library(patchwork)
# library(vegan)
# library(dplyr)
# library(forcats)
# library(RColorBrewer)
# library(randomForest)
# library(caret)
The file handles are set in config.R as they’re used by both this script and data_cleaning.
source("/Users/flashton/Dropbox/GordonGroup/STRATAA_Microbiome/from_Leo/Leonardos_analysis/bin/00.core_functions.R")
source("/Users/flashton/Dropbox/GordonGroup/STRATAA_Microbiome/from_Leo/Leonardos_analysis/bin/config.R")
metadata <- read_metadata(metadata_handle)
# putting this here so that the output files of maaslin get named accroding to the variable names in the metadata file.
metadata <- metadata %>% mutate(Group = if_else(Group == 'Control_HealthySerosurvey', 'Household contact', Group)) %>% mutate(Group = if_else(Group == 'Acute_Typhi', 'Acute typhoid', Group)) %>% mutate(Group = if_else(Group == 'Carrier', 'High Vi-titre', Group))
strataa_metaphlan_data <- read.csv(file = file.path(metaphlan_input_folder, '2023.05.11.all_strataa_metaphlan.csv'), header= TRUE, sep = ",", row.names = 1, stringsAsFactors = FALSE, check.names=FALSE)
strataa_metaphlan_data$lowest_taxonomic_level <- sapply(str_split(row.names(strataa_metaphlan_data), "\\|"), function(x) x[length(x)])
strataa_metaphlan_data_species <- strataa_metaphlan_data %>% filter(str_starts(lowest_taxonomic_level, 's__'))
# metadata <- read.csv(file = file.path(metaphlan_input_folder, '2023.05.11.strataa_metadata.metaphlan.csv'), header = TRUE, sep = ",", row.names = 1, stringsAsFactors = FALSE)
Alpha diversity - all countries, healthy and acute
all_countries_healthy_acute_alpha <- metaphlan_alpha(strataa_metaphlan_data_species, metadata, countries_of_interest = c('Bangladesh', 'Malawi', 'Nepal'), groups_of_interest = c('Acute typhoid', 'Household contact'), comparisons = list(c('Acute typhoid', 'Household contact')), participant_group_colours = participant_group_colours)
# all_countries_healthy_acute_alpha$alpha_by_country
all_countries_healthy_acute_alpha$alpha_anova_summary_with_var_names %>% dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>% kbl() %>% kable_styling()
| rownames(alpha_anova_summary[[1]]) | Df | Sum.Sq | Mean.Sq | F.value | Pr..F. | is_it_significant |
|---|---|---|---|---|---|---|
| Country | 2 | 16.3 | 8.17 | 18 | 9.26e-08 | significant |
| Country:Age:Antibiotics_taken_before_sampling_assumptions | 2 | 5.17 | 2.58 | 5.7 | 0.0041 | significant |
| Country:Age | 2 | 3.32 | 1.66 | 3.66 | 0.0279 | not_significant |
| Sex:Age | 1 | 1.99 | 1.99 | 4.4 | 0.0376 | not_significant |
| Age:Antibiotics_taken_before_sampling_assumptions | 1 | 1.51 | 1.51 | 3.32 | 0.0703 | not_significant |
| Country:Sex | 2 | 2.18 | 1.09 | 2.4 | 0.0941 | not_significant |
| Sex:Group | 1 | 1.12 | 1.12 | 2.46 | 0.119 | not_significant |
| Country:Antibiotics_taken_before_sampling_assumptions | 2 | 1.53 | 0.765 | 1.69 | 0.188 | not_significant |
| Country:Group | 2 | 1.24 | 0.622 | 1.37 | 0.257 | not_significant |
| Country:Sex:Antibiotics_taken_before_sampling_assumptions | 2 | 1.23 | 0.616 | 1.36 | 0.26 | not_significant |
| Group | 1 | 0.567 | 0.567 | 1.25 | 0.265 | not_significant |
| Sex:Antibiotics_taken_before_sampling_assumptions | 1 | 0.555 | 0.555 | 1.22 | 0.27 | not_significant |
| Country:Sex:Group | 2 | 0.759 | 0.379 | 0.837 | 0.435 | not_significant |
| Age | 1 | 0.227 | 0.227 | 0.502 | 0.48 | not_significant |
| Country:Group:Age | 2 | 0.627 | 0.313 | 0.692 | 0.502 | not_significant |
| Antibiotics_taken_before_sampling_assumptions | 1 | 0.195 | 0.195 | 0.43 | 0.513 | not_significant |
| Country:Sex:Age | 2 | 0.43 | 0.215 | 0.475 | 0.623 | not_significant |
| Sex:Age:Antibiotics_taken_before_sampling_assumptions | 1 | 0.0406 | 0.0406 | 0.0897 | 0.765 | not_significant |
| Sex | 1 | 0.0334 | 0.0334 | 0.0736 | 0.786 | not_significant |
| Country:Sex:Group:Age | 2 | 0.206 | 0.103 | 0.228 | 0.797 | not_significant |
| Sex:Group:Age | 1 | 0.027 | 0.027 | 0.0595 | 0.808 | not_significant |
| Group:Age | 1 | 0.0137 | 0.0137 | 0.0302 | 0.862 | not_significant |
| Country:Sex:Age:Antibiotics_taken_before_sampling_assumptions | 2 | 0.0969 | 0.0485 | 0.107 | 0.899 | not_significant |
| Residuals | 153 | 69.3 | 0.453 | NA | NA | NA |
all_countries_healthy_acute_alpha$alpha_plot_group
# all_countries_healthy_acute_alpha$alpha_plot_antibiotics
Acute vs healthy.
all_countries_beta_acute_healthy <- strataa_metaphlan_beta(strataa_metaphlan_data_species, metadata, c('Bangladesh', 'Malawi', 'Nepal'), c('Acute typhoid', 'Household contact'), participant_group_colours)
all_countries_beta_acute_healthy$pn_res %>% kbl %>% kable_styling()
| R2 | Pr(>F) | is_it_significant | |
|---|---|---|---|
| Group | 0.0404140 | 0.0009990 | significant |
| Group:Age | 0.0096324 | 0.0109890 | not_significant |
| Age | 0.0084032 | 0.0479520 | not_significant |
| Antibiotics_taken_before_sampling_assumptions | 0.0074021 | 0.0779221 | not_significant |
| Sex:Group:Age | 0.0072938 | 0.0809191 | not_significant |
| Sex:Age | 0.0069815 | 0.1088911 | not_significant |
| Sex:Age:Antibiotics_taken_before_sampling_assumptions | 0.0062206 | 0.1558442 | not_significant |
| Sex | 0.0061693 | 0.1778222 | not_significant |
| Sex:Antibiotics_taken_before_sampling_assumptions | 0.0060463 | 0.2097902 | not_significant |
| Sex:Group | 0.0053365 | 0.3476523 | not_significant |
| Age:Antibiotics_taken_before_sampling_assumptions | 0.0049480 | 0.4355644 | not_significant |
| Total | 1.0000000 | NA | NA |
| Residual | 0.8911522 | NA | NA |
bgd_beta_acute_healthy <- strataa_metaphlan_beta(strataa_metaphlan_data_species, metadata, c('Bangladesh'), c('Acute typhoid', 'Household contact'), participant_group_colours)
bgd_beta_acute_healthy$pn_res %>% kbl %>% kable_styling()
| R2 | Pr(>F) | is_it_significant | |
|---|---|---|---|
| Group | 0.0741793 | 0.0009990 | significant |
| Age | 0.0179478 | 0.0489510 | not_significant |
| Sex | 0.0169513 | 0.0539461 | not_significant |
| Sex:Age | 0.0144580 | 0.1828172 | not_significant |
| Antibiotics_taken_before_sampling_assumptions | 0.0140324 | 0.2317682 | not_significant |
| Sex:Group:Age | 0.0126416 | 0.3866134 | not_significant |
| Group:Age | 0.0120334 | 0.4005994 | not_significant |
| Age:Antibiotics_taken_before_sampling_assumptions | 0.0097011 | 0.7182817 | not_significant |
| Sex:Antibiotics_taken_before_sampling_assumptions | 0.0083380 | 0.8921079 | not_significant |
| Sex:Group | 0.0079615 | 0.9210789 | not_significant |
| Sex:Age:Antibiotics_taken_before_sampling_assumptions | 0.0061804 | 0.9760240 | not_significant |
| Total | 1.0000000 | NA | NA |
| Residual | 0.8055752 | NA | NA |
mal_beta_acute_healthy <- strataa_metaphlan_beta(strataa_metaphlan_data_species, metadata, c('Malawi'), c('Acute typhoid', 'Household contact'), participant_group_colours)
mal_beta_acute_healthy$pn_res %>% kbl %>% kable_styling()
| R2 | Pr(>F) | is_it_significant | |
|---|---|---|---|
| Group | 0.2814220 | 0.0009990 | significant |
| Age:Antibiotics_taken_before_sampling_assumptions | 0.0264834 | 0.0039960 | significant |
| Sex:Antibiotics_taken_before_sampling_assumptions | 0.0233976 | 0.0289710 | not_significant |
| Antibiotics_taken_before_sampling_assumptions | 0.0216165 | 0.0379620 | not_significant |
| Sex:Group | 0.0214513 | 0.0399600 | not_significant |
| Sex:Age:Antibiotics_taken_before_sampling_assumptions | 0.0206438 | 0.0579421 | not_significant |
| Sex:Age | 0.0161503 | 0.1168831 | not_significant |
| Sex | 0.0149099 | 0.1378621 | not_significant |
| Age | 0.0154778 | 0.1428571 | not_significant |
| Sex:Group:Age | 0.0136252 | 0.2397602 | not_significant |
| Group:Age | 0.0107163 | 0.4055944 | not_significant |
| Total | 1.0000000 | NA | NA |
| Residual | 0.5341059 | NA | NA |
nep_beta_acute_healthy <- strataa_metaphlan_beta(strataa_metaphlan_data_species, metadata, c('Nepal'), c('Acute typhoid', 'Household contact'), participant_group_colours)
nep_beta_acute_healthy$pn_res %>% kbl %>% kable_styling()
| R2 | Pr(>F) | is_it_significant | |
|---|---|---|---|
| Group | 0.0578851 | 0.0009990 | significant |
| Sex | 0.0424544 | 0.0029970 | significant |
| Sex:Group | 0.0269210 | 0.1238761 | not_significant |
| Sex:Antibiotics_taken_before_sampling_assumptions | 0.0256575 | 0.1578422 | not_significant |
| Age | 0.0231087 | 0.3046953 | not_significant |
| Sex:Age:Antibiotics_taken_before_sampling_assumptions | 0.0212328 | 0.4855145 | not_significant |
| Sex:Age | 0.0200241 | 0.4885115 | not_significant |
| Sex:Group:Age | 0.0201543 | 0.5344655 | not_significant |
| Group:Age | 0.0170470 | 0.7722278 | not_significant |
| Antibiotics_taken_before_sampling_assumptions | 0.0167425 | 0.7962038 | not_significant |
| Age:Antibiotics_taken_before_sampling_assumptions | 0.0152914 | 0.8781219 | not_significant |
| Total | 1.0000000 | NA | NA |
| Residual | 0.7134813 | NA | NA |
all_countries_beta_acute_healthy$pc12 + bgd_beta_acute_healthy$pc12 + mal_beta_acute_healthy$pc12 + nep_beta_acute_healthy$pc12 + plot_layout(guides = 'collect')
Maaslin basics
# the names here should be the full name from the metadata file, not the "presentation" name
# because this is used to read in the files, which are written using the full name.
groups_to_analyse <- c('Acute_Typhi', 'Control_HealthySerosurvey')
bang_variables_for_analysis <- c("Group", "Sex", "Age", "Antibiotics_taken_before_sampling_assumptions")
mwi_variables_for_analysis <- c("Group", "Sex", "Age", "Antibiotics_taken_before_sampling_assumptions", "sequencing_lane")
nep_variables_for_analysis <- c("Group", "Sex", "Age", "Antibiotics_taken_before_sampling_assumptions")
bangladesh_taxonomic_maaslin <- read_in_maaslin('Bangladesh', groups_to_analyse, bang_variables_for_analysis, 'metaphlan')
malawi_taxonomic_maaslin <- read_in_maaslin('Malawi', groups_to_analyse, mwi_variables_for_analysis, 'metaphlan')
nepal_taxonomic_maaslin <- read_in_maaslin('Nepal', groups_to_analyse, nep_variables_for_analysis, 'metaphlan')
bangladesh_taxonomic_maaslin_filtered <- filter_taxonomic_maaslin(bangladesh_taxonomic_maaslin)
malawi_taxonomic_maaslin_filtered <- filter_taxonomic_maaslin(malawi_taxonomic_maaslin)
nepal_taxonomic_maaslin_filtered <- filter_taxonomic_maaslin(nepal_taxonomic_maaslin)
bangladesh_maaslin_stats <- basic_maaslin_stats(bangladesh_taxonomic_maaslin_filtered, 'Bangladesh', bang_variables_for_analysis, groups_to_analyse)
malawi_maaslin_stats <- basic_maaslin_stats(malawi_taxonomic_maaslin_filtered, 'Malawi', mwi_variables_for_analysis, groups_to_analyse)
nepal_maaslin_stats <- basic_maaslin_stats(nepal_taxonomic_maaslin_filtered, 'Nepal', nep_variables_for_analysis, groups_to_analyse)
There were 92 species significantly (q-val < 0.05) associated with health/disease in Malawi, in Bangladesh, and in Nepal.
Combine the taxonomic maaslins, and print out the species that are sig in both and share directions.
Because sequencing run and participant type are totally confounded for Bangladesh, need to exclude sequencing run from the final model for Bangladesh (otherwise, wipes out the signals).
associated at both sites
bang_mal <- list(bangladesh_taxonomic_maaslin_filtered, malawi_taxonomic_maaslin_filtered)
combined_results <- run_inner_join_maaslins(bang_mal, c('_bang', '_mal'), mwi_variables_for_analysis, groups_to_analyse, 'metaphlan', maaslin_taxonomic_output_root_folder)
# View(combined_results$positive_coef)
# View(combined_results$negative_coef)
combined_results$positive_coef %>% filter(grepl('^s', lowest_taxonomic_level)) %>%
select(!c(metadata, value, N_bang, N.not.0_bang, pval_bang, N_mal, N.not.0_mal, pval_mal)) %>%
rename(Species = lowest_taxonomic_level, `Coefficient Bangladesh` = coef_bang, `Standard Error Bangladesh` = stderr_bang, `Q-value Bangladesh` = qval_bang, `Coefficient Malawi` = coef_mal, `Standard Error Malawi` = stderr_mal, `Q-value Malawi` = qval_mal) %>%
dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>%
kbl() %>% kable_styling()
| feature | Species | Coefficient Bangladesh | Standard Error Bangladesh | Q-value Bangladesh | Coefficient Malawi | Standard Error Malawi | Q-value Malawi |
|---|---|---|---|---|---|---|---|
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridium.s__Clostridium_SGB6179 | s__Clostridium_SGB6179 | 8.79 | 1.34 | 4.25e-05 | 5.52 | 1.6 | 0.0491 |
| k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Prevotellaceae.g__Prevotella.s__Prevotella_copri_clade_A | s__Prevotella_copri_clade_A | 4.54 | 0.978 | 0.00971 | 10.1 | 1.98 | 0.000989 |
| k__Bacteria.p__Firmicutes.c__Negativicutes.o__Veillonellales.f__Veillonellaceae.g__GGB4266.s__GGB4266_SGB5809 | s__GGB4266_SGB5809 | 4.58 | 1.09 | 0.0147 | 8.73 | 1.29 | 7.64e-06 |
| k__Bacteria.p__Proteobacteria.c__Gammaproteobacteria.o__Pasteurellales.f__Pasteurellaceae.g__Haemophilus.s__Haemophilus_parainfluenzae | s__Haemophilus_parainfluenzae | 3.64 | 0.979 | 0.03 | 6.85 | 1.11 | 5e-05 |
combined_results$negative_coef %>% filter(grepl('^s', lowest_taxonomic_level)) %>%
select(!c(metadata, value, N_bang, N.not.0_bang, pval_bang, N_mal, N.not.0_mal, pval_mal)) %>%
rename(Species = lowest_taxonomic_level, `Coefficient Bangladesh` = coef_bang, `Standard Error Bangladesh` = stderr_bang, `Q-value Bangladesh` = qval_bang, `Coefficient Malawi` = coef_mal, `Standard Error Malawi` = stderr_mal, `Q-value Malawi` = qval_mal) %>%
dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>%
kbl() %>% kable_styling()
| feature | Species | Coefficient Bangladesh | Standard Error Bangladesh | Q-value Bangladesh | Coefficient Malawi | Standard Error Malawi | Q-value Malawi |
|---|---|---|---|---|---|---|---|
# todo - refactoring the run_combine_maaslins means that we dont get the species that are only associated at one site. need to fix that.
# nrow(combined_results$mwi_maaslin_only)
# nrow(combined_results$bang_maaslin_only)
mal_bang_maaslins <- rbind(combined_results$positive_coef, combined_results$negative_coef)
# View(combined_results$positive_coef )
# do an anti-join to get the species that are only associated at one site
bang_only <- anti_join(bangladesh_taxonomic_maaslin_filtered, mal_bang_maaslins, by = c('feature', 'metadata', 'value')) %>% filter(qval < 0.05)
mal_only <- anti_join(malawi_taxonomic_maaslin_filtered, mal_bang_maaslins, by = c('feature', 'metadata', 'value')) %>% filter(qval < 0.05)
bang_only %>%
rename(Species = lowest_taxonomic_level, `Coefficient` = coef, `Standard Error` = stderr, `Q-value` = qval) %>%
dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>%
kbl() %>% kable_styling()
| feature | Species | metadata | value | Coefficient | Standard Error | N | N.not.0 | pval | Q-value |
|---|---|---|---|---|---|---|---|---|---|
| k__Bacteria.p__Actinobacteria.c__Actinomycetia.o__Actinomycetales.f__Actinomycetaceae.g__Actinomyces.s__Actinomyces_naeslundii | s__Actinomyces_naeslundii | Group | Control_HealthySerosurvey | -3.27 | 0.543 | 80 | 48 | 5.7e-08 | 0.000216 |
| k__Bacteria.p__Actinobacteria.c__Actinomycetia.o__Micrococcales.f__Cellulomonadaceae.g__Cellulomonas.s__Cellulomonas_flavigena | s__Cellulomonas_flavigena | Group | Control_HealthySerosurvey | -1.59 | 0.307 | 80 | 11 | 1.69e-06 | 0.00256 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Blautia.s__Blautia_glucerasea | s__Blautia_glucerasea | Group | Control_HealthySerosurvey | -4.97 | 0.987 | 80 | 41 | 3.23e-06 | 0.00407 |
| k__Bacteria.p__Actinobacteria.c__Actinomycetia.o__Actinomycetales.f__Actinomycetaceae.g__Actinomyces.s__Actinomyces_oris | s__Actinomyces_oris | Group | Control_HealthySerosurvey | -3.73 | 0.749 | 80 | 51 | 3.99e-06 | 0.00431 |
| k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Prevotellaceae.g__Prevotella.s__Prevotella_copri_clade_C | s__Prevotella_copri_clade_C | Group | Control_HealthySerosurvey | 4.05 | 0.846 | 80 | 65 | 8.35e-06 | 0.0079 |
| k__Bacteria.p__Actinobacteria.c__Actinomycetia.o__Actinomycetales.f__Actinomycetaceae.g__Actinomyces.s__Actinomyces_dentalis | s__Actinomyces_dentalis | Group | Control_HealthySerosurvey | -3.42 | 0.728 | 80 | 37 | 1.15e-05 | 0.00968 |
| k__Bacteria.p__Actinobacteria.c__Actinomycetia.o__Actinomycetales.f__Actinomycetaceae.g__Actinomyces.s__Actinomyces_graevenitzii | s__Actinomyces_graevenitzii | Group | Control_HealthySerosurvey | -4.99 | 1.07 | 80 | 37 | 1.4e-05 | 0.00971 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Eubacteriaceae.g__Eubacterium.s__Eubacterium_ramulus | s__Eubacterium_ramulus | Group | Control_HealthySerosurvey | -3.27 | 0.817 | 80 | 26 | 0.000144 | 0.0147 |
| k__Bacteria.p__Firmicutes.c__Bacilli.o__Lactobacillales.f__Lactobacillaceae.g__Ligilactobacillus.s__Ligilactobacillus_ruminis | s__Ligilactobacillus_ruminis | Group | Control_HealthySerosurvey | 5.38 | 1.26 | 80 | 70 | 5.32e-05 | 0.0147 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Mediterraneibacter.s__Mediterraneibacter_butyricigenes | s__Mediterraneibacter_butyricigenes | Group | Control_HealthySerosurvey | -2.9 | 0.691 | 80 | 27 | 7.31e-05 | 0.0147 |
| k__Bacteria.p__Actinobacteria.c__Actinomycetia.o__Actinomycetales.f__Actinomycetaceae.g__Pauljensenia.s__Pauljensenia_hongkongensis | s__Pauljensenia_hongkongensis | Group | Control_HealthySerosurvey | -3.15 | 0.716 | 80 | 27 | 3.51e-05 | 0.0147 |
| k__Bacteria.p__Actinobacteria.c__Actinomycetia.o__Actinomycetales.f__Actinomycetaceae.g__Actinomyces.s__Actinomyces_timonensis | s__Actinomyces_timonensis | Group | Control_HealthySerosurvey | -0.522 | 0.133 | 80 | 3 | 0.000188 | 0.0182 |
| k__Bacteria.p__Actinobacteria.c__Actinomycetia.o__Actinomycetales.f__Actinomycetaceae.g__Actinomyces.s__Actinomyces_sp_oral_taxon_448 | s__Actinomyces_sp_oral_taxon_448 | Group | Control_HealthySerosurvey | -2.71 | 0.728 | 80 | 23 | 0.000385 | 0.03 |
| k__Bacteria.p__Actinobacteria.c__Actinomycetia.o__Actinomycetales.f__Actinomycetaceae.g__Actinomyces.s__Actinomyces_massiliensis | s__Actinomyces_massiliensis | Group | Control_HealthySerosurvey | -3.14 | 0.854 | 80 | 32 | 0.000447 | 0.0338 |
| k__Bacteria.p__Firmicutes.c__Bacilli.o__Lactobacillales.f__Streptococcaceae.g__Streptococcus.s__Streptococcus_SGB3665 | s__Streptococcus_SGB3665 | Group | Control_HealthySerosurvey | -1.39 | 0.38 | 80 | 6 | 0.000458 | 0.0343 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Oscillospiraceae.g__GGB9627.s__GGB9627_SGB15081 | s__GGB9627_SGB15081 | Group | Control_HealthySerosurvey | -2.43 | 0.666 | 80 | 22 | 0.000486 | 0.0357 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Lachnospiraceae_unclassified.s__Lachnospiraceae_bacterium | s__Lachnospiraceae_bacterium | Group | Control_HealthySerosurvey | -3.05 | 0.838 | 80 | 68 | 0.000502 | 0.0366 |
| k__Bacteria.p__Firmicutes.c__Tissierellia.o__Tissierellales.f__Peptoniphilaceae.g__Peptoniphilus.s__Peptoniphilus_lacrimalis | s__Peptoniphilus_lacrimalis | Group | Control_HealthySerosurvey | -1.47 | 0.406 | 80 | 10 | 0.000536 | 0.0383 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Peptostreptococcaceae.g__Romboutsia.s__Romboutsia_timonensis | s__Romboutsia_timonensis | Group | Control_HealthySerosurvey | 4.52 | 1.25 | 80 | 66 | 0.000545 | 0.0386 |
mal_only %>%
rename(Species = lowest_taxonomic_level, `Coefficient` = coef, `Standard Error` = stderr, `Q-value` = qval) %>%
dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>%
kbl() %>% kable_styling()
| feature | Species | metadata | value | Coefficient | Standard Error | N | N.not.0 | pval | Q-value |
|---|---|---|---|---|---|---|---|---|---|
| k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Phocaeicola.s__Phocaeicola_massiliensis | s__Phocaeicola_massiliensis | Group | Control_HealthySerosurvey | 7.41 | 0.389 | 63 | 42 | 2.21e-25 | 3.93e-21 |
| k__Bacteria.p__Proteobacteria.c__Betaproteobacteria.o__Burkholderiales.f__Sutterellaceae.g__GGB6565.s__GGB6565_SGB9274 | s__GGB6565_SGB9274 | Group | Control_HealthySerosurvey | 7.28 | 0.433 | 63 | 43 | 6.81e-23 | 4.85e-19 |
| k__Bacteria.p__Proteobacteria.c__Deltaproteobacteria.o__Desulfovibrionales.f__Desulfovibrionaceae.g__Desulfovibrio.s__Desulfovibrio_SGB5077 | s__Desulfovibrio_SGB5077 | Group | Control_HealthySerosurvey | 8.33 | 0.614 | 63 | 44 | 7.07e-19 | 3.59e-15 |
| k__Bacteria.p__Proteobacteria.c__Betaproteobacteria.o__Burkholderiales.f__Sutterellaceae.g__Duodenibacillus.s__Duodenibacillus_massiliensis | s__Duodenibacillus_massiliensis | Group | Control_HealthySerosurvey | 9.52 | 0.754 | 63 | 43 | 1.33e-17 | 4.73e-14 |
| k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Phocaeicola.s__Phocaeicola_plebeius | s__Phocaeicola_plebeius | Group | Control_HealthySerosurvey | 8.13 | 0.705 | 63 | 42 | 4.59e-16 | 1.36e-12 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Lacrimispora.s__Lacrimispora_amygdalina | s__Lacrimispora_amygdalina | Group | Control_HealthySerosurvey | 5.89 | 0.548 | 63 | 45 | 6.16e-15 | 1.46e-11 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Coprococcus.s__Coprococcus_SGB4580 | s__Coprococcus_SGB4580 | Group | Control_HealthySerosurvey | 4.56 | 0.468 | 63 | 47 | 2.03e-13 | 4.02e-10 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Lachnospira.s__Lachnospira_sp_NSJ_43 | s__Lachnospira_sp_NSJ_43 | Group | Control_HealthySerosurvey | 4.07 | 0.441 | 63 | 41 | 1.36e-12 | 2.42e-09 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Anaerostipes.s__Anaerostipes_SGB4708 | s__Anaerostipes_SGB4708 | Group | Control_HealthySerosurvey | 8.32 | 0.93 | 63 | 46 | 3.67e-12 | 5.93e-09 |
| k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Phocaeicola.s__Phocaeicola_coprocola | s__Phocaeicola_coprocola | Group | Control_HealthySerosurvey | 7.75 | 0.922 | 63 | 41 | 2.61e-11 | 3.87e-08 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridium.s__Clostridium_sp_AM33_3 | s__Clostridium_sp_AM33_3 | Group | Control_HealthySerosurvey | 8.75 | 1.08 | 63 | 47 | 7.08e-11 | 9.69e-08 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Oscillospiraceae.g__GGB9614.s__GGB9614_SGB15049 | s__GGB9614_SGB15049 | Group | Control_HealthySerosurvey | 3.9 | 0.497 | 63 | 43 | 2.01e-10 | 2.46e-07 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Eubacteriales_unclassified.g__Eubacteriales_unclassified.s__Eubacteriales_unclassified_SGB15145 | s__Eubacteriales_unclassified_SGB15145 | Group | Control_HealthySerosurvey | 6.23 | 0.802 | 63 | 41 | 2.64e-10 | 3.04e-07 |
| k__Bacteria.p__Firmicutes.c__Erysipelotrichia.o__Erysipelotrichales.f__Erysipelotrichaceae.g__Faecalibacillus.s__Faecalibacillus_intestinalis | s__Faecalibacillus_intestinalis | Group | Control_HealthySerosurvey | 10.5 | 1.38 | 63 | 53 | 3.81e-10 | 4.11e-07 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridiaceae_unclassified.s__Clostridiaceae_bacterium_Marseille_Q4145 | s__Clostridiaceae_bacterium_Marseille_Q4145 | Group | Control_HealthySerosurvey | 5.57 | 0.737 | 63 | 45 | 5.74e-10 | 5.84e-07 |
| k__Bacteria.p__Firmicutes.c__CFGB15212.o__OFGB15212.f__FGB15212.g__GGB41458.s__GGB41458_SGB58520 | s__GGB41458_SGB58520 | Group | Control_HealthySerosurvey | 4.74 | 0.66 | 63 | 44 | 2.27e-09 | 1.97e-06 |
| k__Bacteria.p__Proteobacteria.c__Betaproteobacteria.o__Burkholderiales.f__Sutterellaceae.g__Sutterella.s__Sutterella_wadsworthensis | s__Sutterella_wadsworthensis | Group | Control_HealthySerosurvey | 8.97 | 1.28 | 63 | 46 | 4.6e-09 | 3.9e-06 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridiaceae_unclassified.s__Clostridiaceae_bacterium_Marseille_Q4143 | s__Clostridiaceae_bacterium_Marseille_Q4143 | Group | Control_HealthySerosurvey | 6.68 | 0.979 | 63 | 47 | 8.71e-09 | 7.05e-06 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Lachnospiraceae_unclassified.s__Lachnospiraceae_bacterium_AM48_27BH | s__Lachnospiraceae_bacterium_AM48_27BH | Group | Control_HealthySerosurvey | 4.33 | 0.64 | 63 | 44 | 1.05e-08 | 7.64e-06 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Lachnospira.s__Lachnospira_pectinoschiza | s__Lachnospira_pectinoschiza | Group | Control_HealthySerosurvey | 7.18 | 1.06 | 63 | 42 | 1.14e-08 | 7.95e-06 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridiaceae_unclassified.s__Clostridiaceae_bacterium_AF18_31LB | s__Clostridiaceae_bacterium_AF18_31LB | Group | Control_HealthySerosurvey | 4.69 | 0.706 | 63 | 49 | 1.72e-08 | 1.15e-05 |
| k__Bacteria.p__Firmicutes.c__Firmicutes_unclassified.o__Firmicutes_unclassified.f__Firmicutes_unclassified.g__GGB9511.s__GGB9511_SGB14908 | s__GGB9511_SGB14908 | Group | Control_HealthySerosurvey | 6.13 | 0.934 | 63 | 45 | 2.32e-08 | 1.47e-05 |
| k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_eggerthii | s__Bacteroides_eggerthii | Group | Control_HealthySerosurvey | 4.54 | 0.723 | 63 | 41 | 6.55e-08 | 3.76e-05 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Mediterraneibacter.s__Ruminococcus_lactaris | s__Ruminococcus_lactaris | Group | Control_HealthySerosurvey | 8.9 | 1.43 | 63 | 49 | 8.14e-08 | 4.2e-05 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridium.s__Clostridium_sp_AM22_11AC | s__Clostridium_sp_AM22_11AC | Group | Control_HealthySerosurvey | 8.74 | 1.4 | 63 | 52 | 7.6e-08 | 4.2e-05 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Roseburia.s__Roseburia_intestinalis | s__Roseburia_intestinalis | Group | Control_HealthySerosurvey | 8.59 | 1.38 | 63 | 45 | 8.1e-08 | 4.2e-05 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Blautia.s__Blautia_stercoris | s__Blautia_stercoris | Group | Control_HealthySerosurvey | 9.71 | 1.58 | 63 | 50 | 1.01e-07 | 4.92e-05 |
| k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_caccae | s__Bacteroides_caccae | Group | Control_HealthySerosurvey | 5.55 | 0.936 | 63 | 46 | 2.37e-07 | 9.6e-05 |
| k__Bacteria.p__Firmicutes.c__CFGB1422.o__OFGB1422.f__FGB1422.g__GGB3486.s__GGB3486_SGB4658 | s__GGB3486_SGB4658 | Group | Control_HealthySerosurvey | 5.69 | 0.96 | 63 | 41 | 2.37e-07 | 9.6e-05 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridium.s__Clostridium_fessum | s__Clostridium_fessum | Group | Control_HealthySerosurvey | 4.55 | 0.779 | 63 | 50 | 3.15e-07 | 0.000119 |
| k__Bacteria.p__Proteobacteria.c__Deltaproteobacteria.o__Desulfovibrionales.f__Desulfovibrionaceae.g__Bilophila.s__Bilophila_SGB15450 | s__Bilophila_SGB15450 | Group | Control_HealthySerosurvey | 5.3 | 0.929 | 63 | 40 | 5.22e-07 | 0.00019 |
| k__Bacteria.p__Bacteroidetes.c__CFGB629.o__OFGB629.f__FGB629.g__GGB1495.s__GGB1495_SGB2071 | s__GGB1495_SGB2071 | Group | Control_HealthySerosurvey | 6.84 | 1.21 | 63 | 50 | 6.4e-07 | 0.000215 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridium.s__Clostridium_sp_AF12_28 | s__Clostridium_sp_AF12_28 | Group | Control_HealthySerosurvey | 3.95 | 0.71 | 63 | 45 | 8.78e-07 | 0.000281 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Roseburia.s__Roseburia_faecis | s__Roseburia_faecis | Group | Control_HealthySerosurvey | 7.65 | 1.38 | 63 | 49 | 9.66e-07 | 0.000304 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Eubacteriaceae.g__Eubacterium.s__Eubacterium_sp_OM08_24 | s__Eubacterium_sp_OM08_24 | Group | Control_HealthySerosurvey | 6.1 | 1.11 | 63 | 47 | 1.24e-06 | 0.000383 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Oscillospiraceae.g__Agathobaculum.s__Agathobaculum_butyriciproducens | s__Agathobaculum_butyriciproducens | Group | Control_HealthySerosurvey | 4.45 | 0.825 | 63 | 55 | 1.65e-06 | 0.00049 |
| k__Bacteria.p__Firmicutes.c__Negativicutes.o__Veillonellales.f__Veillonellaceae.g__Veillonella.s__Veillonella_dispar | s__Veillonella_dispar | Group | Control_HealthySerosurvey | 4.7 | 0.883 | 63 | 45 | 2.12e-06 | 0.000609 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridium.s__Clostridium_perfringens | s__Clostridium_perfringens | Group | Control_HealthySerosurvey | 6.21 | 1.18 | 63 | 47 | 2.46e-06 | 0.00068 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Faecalicatena.s__Faecalicatena_fissicatena | s__Faecalicatena_fissicatena | Group | Control_HealthySerosurvey | 8.04 | 1.55 | 63 | 52 | 3.52e-06 | 0.000936 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Roseburia.s__Roseburia_inulinivorans | s__Roseburia_inulinivorans | Group | Control_HealthySerosurvey | 6.24 | 1.22 | 63 | 58 | 4.17e-06 | 0.000989 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Oscillospiraceae.g__GGB9616.s__GGB9616_SGB15052 | s__GGB9616_SGB15052 | Group | Control_HealthySerosurvey | 3.83 | 0.749 | 63 | 47 | 4.4e-06 | 0.000989 |
| k__Bacteria.p__Firmicutes.c__Negativicutes.o__Veillonellales.f__Veillonellaceae.g__Veillonella.s__Veillonella_atypica | s__Veillonella_atypica | Group | Control_HealthySerosurvey | 6.27 | 1.22 | 63 | 44 | 4.23e-06 | 0.000989 |
| k__Bacteria.p__Firmicutes.c__CFGB76639.o__OFGB76639.f__FGB76639.g__GGB2658.s__GGB2658_SGB3579 | s__GGB2658_SGB3579 | Group | Control_HealthySerosurvey | -0.321 | 0.0628 | 63 | 1 | 4.5e-06 | 0.000989 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__GGB3619.s__GGB3619_SGB4895 | s__GGB3619_SGB4895 | Group | Control_HealthySerosurvey | -0.321 | 0.0628 | 63 | 1 | 4.5e-06 | 0.000989 |
| k__Bacteria.p__Bacteroidetes.c__CFGB76185.o__OFGB76185.f__FGB76185.g__GGB1550.s__GGB1550_SGB2134 | s__GGB1550_SGB2134 | Group | Control_HealthySerosurvey | -2.53 | 0.492 | 63 | 3 | 3.98e-06 | 0.000989 |
| k__Bacteria.p__Actinobacteria.c__Coriobacteriia.o__Eggerthellales.f__Eggerthellaceae.g__Paraeggerthella.s__Paraeggerthella_hongkongensis | s__Paraeggerthella_hongkongensis | Group | Control_HealthySerosurvey | -0.321 | 0.0628 | 63 | 1 | 4.5e-06 | 0.000989 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Christensenellaceae.g__Christensenella.s__Christensenella_minuta | s__Christensenella_minuta | Group | Control_HealthySerosurvey | -0.321 | 0.0628 | 63 | 1 | 4.5e-06 | 0.000989 |
| k__Bacteria.p__Firmicutes.c__Firmicutes_unclassified.o__Firmicutes_unclassified.f__Firmicutes_unclassified.g__Firmicutes_unclassified.s__Firmicutes_bacterium_AF16_15 | s__Firmicutes_bacterium_AF16_15 | Group | Control_HealthySerosurvey | 5.44 | 1.08 | 63 | 50 | 5.57e-06 | 0.00119 |
| k__Bacteria.p__Actinobacteria.c__Coriobacteriia.o__Eggerthellales.f__Eggerthellaceae.g__GGB9420.s__GGB9420_SGB14786 | s__GGB9420_SGB14786 | Group | Control_HealthySerosurvey | -1.28 | 0.258 | 63 | 2 | 7.42e-06 | 0.00144 |
| k__Bacteria.p__Firmicutes.c__Negativicutes.o__Veillonellales.f__Veillonellaceae.g__Veillonella.s__Veillonella_tobetsuensis | s__Veillonella_tobetsuensis | Group | Control_HealthySerosurvey | 5.01 | 1.02 | 63 | 41 | 9.5e-06 | 0.00174 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Coprococcus.s__Coprococcus_SGB4669 | s__Coprococcus_SGB4669 | Group | Control_HealthySerosurvey | 7.1 | 1.46 | 63 | 54 | 1.02e-05 | 0.00185 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridium.s__Clostridium_sp_AF27_2AA | s__Clostridium_sp_AF27_2AA | Group | Control_HealthySerosurvey | 6.05 | 1.25 | 63 | 44 | 1.09e-05 | 0.00196 |
| k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Prevotellaceae.g__Prevotella.s__Prevotella_SGB1680 | s__Prevotella_SGB1680 | Group | Control_HealthySerosurvey | -1.58 | 0.33 | 63 | 3 | 1.33e-05 | 0.00229 |
| k__Bacteria.p__Firmicutes.c__CFGB72899.o__OFGB72899.f__FGB72899.g__GGB4608.s__GGB4608_SGB6382 | s__GGB4608_SGB6382 | Group | Control_HealthySerosurvey | -4.77 | 0.997 | 63 | 3 | 1.4e-05 | 0.00238 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridium.s__Clostridium_paraputrificum | s__Clostridium_paraputrificum | Group | Control_HealthySerosurvey | 6.51 | 1.36 | 63 | 42 | 1.44e-05 | 0.00243 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Enterocloster.s__Enterocloster_bolteae | s__Enterocloster_bolteae | Group | Control_HealthySerosurvey | -2.19 | 0.459 | 63 | 5 | 1.49e-05 | 0.00247 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridium.s__Clostridium_saudiense | s__Clostridium_saudiense | Group | Control_HealthySerosurvey | 4.96 | 1.1 | 63 | 48 | 3.56e-05 | 0.00499 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Blautia.s__Blautia_obeum | s__Blautia_obeum | Group | Control_HealthySerosurvey | 4.32 | 0.974 | 63 | 60 | 4.63e-05 | 0.0062 |
| k__Bacteria.p__Firmicutes.c__Bacilli.o__Lactobacillales.f__Streptococcaceae.g__Streptococcus.s__Streptococcus_salivarius | s__Streptococcus_salivarius | Group | Control_HealthySerosurvey | 8.23 | 1.86 | 63 | 55 | 4.68e-05 | 0.00622 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Oscillospiraceae.g__Ruminococcus.s__Ruminococcus_bromii | s__Ruminococcus_bromii | Group | Control_HealthySerosurvey | -7.54 | 1.72 | 63 | 18 | 5.56e-05 | 0.00712 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Mediterraneibacter.s__Ruminococcus_gnavus | s__Ruminococcus_gnavus | Group | Control_HealthySerosurvey | 7.4 | 1.72 | 63 | 46 | 7.39e-05 | 0.00903 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridium.s__Clostridium_celatum | s__Clostridium_celatum | Group | Control_HealthySerosurvey | 4.8 | 1.13 | 63 | 41 | 8.23e-05 | 0.0099 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Oscillospiraceae.g__Faecalibacterium.s__Faecalibacterium_prausnitzii | s__Faecalibacterium_prausnitzii | Group | Control_HealthySerosurvey | 5.53 | 1.3 | 63 | 60 | 8.7e-05 | 0.0102 |
| k__Bacteria.p__Firmicutes.c__CFGB1311.o__OFGB1311.f__FGB1311.g__GGB3141.s__GGB3141_SGB4154 | s__GGB3141_SGB4154 | Group | Control_HealthySerosurvey | -4.04 | 0.951 | 63 | 6 | 8.76e-05 | 0.0102 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Oscillospiraceae.g__Faecalibacterium.s__Faecalibacterium_SGB15346 | s__Faecalibacterium_SGB15346 | Group | Control_HealthySerosurvey | 5.98 | 1.42 | 63 | 58 | 0.000104 | 0.0119 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Dorea.s__Dorea_formicigenerans | s__Dorea_formicigenerans | Group | Control_HealthySerosurvey | 3.68 | 0.878 | 63 | 58 | 0.000106 | 0.0121 |
| k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_salyersiae | s__Bacteroides_salyersiae | Group | Control_HealthySerosurvey | 3.05 | 0.728 | 63 | 42 | 0.000108 | 0.0122 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Clostridium.s__Clostridium_sp_AF20_17LB | s__Clostridium_sp_AF20_17LB | Group | Control_HealthySerosurvey | 2.47 | 0.597 | 63 | 45 | 0.000128 | 0.0135 |
| k__Bacteria.p__Firmicutes.c__Erysipelotrichia.o__Erysipelotrichales.f__Turicibacteraceae.g__Turicibacter.s__Turicibacter_sanguinis | s__Turicibacter_sanguinis | Group | Control_HealthySerosurvey | 4.77 | 1.17 | 63 | 44 | 0.000154 | 0.0154 |
| k__Bacteria.p__Firmicutes.c__Erysipelotrichia.o__Erysipelotrichales.f__Coprobacillaceae.g__Coprobacillus.s__Coprobacillus_cateniformis | s__Coprobacillus_cateniformis | Group | Control_HealthySerosurvey | -1.61 | 0.401 | 63 | 3 | 0.00019 | 0.0179 |
| k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Tannerellaceae.g__Parabacteroides.s__Parabacteroides_merdae | s__Parabacteroides_merdae | Group | Control_HealthySerosurvey | 5.44 | 1.38 | 63 | 42 | 0.000227 | 0.0204 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Oscillospiraceae.g__Oscillospiraceae_unclassified.s__Oscillospiraceae_unclassified_SGB15257 | s__Oscillospiraceae_unclassified_SGB15257 | Group | Control_HealthySerosurvey | 6.88 | 1.74 | 63 | 50 | 0.000234 | 0.0208 |
| k__Bacteria.p__Actinobacteria.c__Coriobacteriia.o__Coriobacteriales.f__Coriobacteriaceae.g__Collinsella.s__Collinsella_aerofaciens | s__Collinsella_aerofaciens | Group | Control_HealthySerosurvey | 6.12 | 1.56 | 63 | 56 | 0.000256 | 0.022 |
| k__Archaea.p__Thaumarchaeota.c__Thaumarchaeota_unclassified.o__Nitrosopumilales.f__Nitrosopumilaceae.g__Nitrosopumilus.s__Nitrosopumilus_SGB14899 | s__Nitrosopumilus_SGB14899 | Group | Control_HealthySerosurvey | -2.08 | 0.533 | 63 | 8 | 0.000271 | 0.0223 |
| k__Bacteria.p__Firmicutes.c__CFGB1473.o__OFGB1473.f__FGB1473.g__GGB3730.s__GGB3730_SGB5060 | s__GGB3730_SGB5060 | Group | Control_HealthySerosurvey | -3.14 | 0.82 | 63 | 9 | 0.000337 | 0.0229 |
| k__Bacteria.p__Firmicutes.c__Bacilli.o__Lactobacillales.f__Streptococcaceae.g__Lactococcus.s__Lactococcus_garvieae | s__Lactococcus_garvieae | Group | Control_HealthySerosurvey | -1.38 | 0.358 | 63 | 3 | 0.000316 | 0.0229 |
| k__Bacteria.p__Actinobacteria.c__Actinomycetia.o__Corynebacteriales.f__Corynebacteriaceae.g__Corynebacterium.s__Corynebacterium_propinquum | s__Corynebacterium_propinquum | Group | Control_HealthySerosurvey | 0.879 | 0.228 | 63 | 2 | 0.000319 | 0.0229 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Mediterraneibacter.s__Ruminococcus_torques | s__Ruminococcus_torques | Group | Control_HealthySerosurvey | 4.38 | 1.15 | 63 | 60 | 0.000352 | 0.0231 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Lachnospiraceae.g__Lachnospiraceae_unclassified.s__Lachnospiraceae_bacterium_NSJ_46 | s__Lachnospiraceae_bacterium_NSJ_46 | Group | Control_HealthySerosurvey | 3.48 | 0.928 | 63 | 49 | 0.00043 | 0.0265 |
| k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_congonensis | s__Bacteroides_congonensis | Group | Control_HealthySerosurvey | 4.33 | 1.17 | 63 | 30 | 0.000501 | 0.0289 |
| k__Bacteria.p__Actinobacteria.c__Actinomycetia.o__Actinomycetales.f__Actinomycetaceae.g__Actinomyces.s__Actinomyces_sp_HMSC035G02 | s__Actinomyces_sp_HMSC035G02 | Group | Control_HealthySerosurvey | -1.48 | 0.401 | 63 | 5 | 0.000542 | 0.0308 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Oscillospiraceae.g__GGB9737.s__GGB9737_SGB15309 | s__GGB9737_SGB15309 | Group | Control_HealthySerosurvey | 7.27 | 1.98 | 63 | 48 | 0.000554 | 0.0313 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Clostridiaceae.g__Butyricicoccus.s__Butyricicoccus_sp_AM29_23AC | s__Butyricicoccus_sp_AM29_23AC | Group | Control_HealthySerosurvey | 5.51 | 1.52 | 63 | 52 | 0.000658 | 0.0352 |
| k__Bacteria.p__Firmicutes.c__CFGB3068.o__OFGB3068.f__FGB3068.g__GGB9760.s__GGB9760_SGB15372 | s__GGB9760_SGB15372 | Group | Control_HealthySerosurvey | 4.1 | 1.14 | 63 | 54 | 0.000691 | 0.0363 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Oscillospiraceae.g__Ruminococcus.s__Ruminococcus_sp_NSJ_13 | s__Ruminococcus_sp_NSJ_13 | Group | Control_HealthySerosurvey | 5.19 | 1.44 | 63 | 52 | 0.000725 | 0.0372 |
| k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Rikenellaceae.g__Alistipes.s__Alistipes_putredinis | s__Alistipes_putredinis | Group | Control_HealthySerosurvey | 4.42 | 1.25 | 63 | 51 | 0.000839 | 0.0415 |
| k__Bacteria.p__Firmicutes.c__Clostridia.o__Eubacteriales.f__Eubacteriales_Family_XIII_Incertae_Sedis.g__GGB2977.s__GGB2977_SGB3959 | s__GGB2977_SGB3959 | Group | Control_HealthySerosurvey | -2.51 | 0.709 | 63 | 13 | 0.000849 | 0.0415 |
| k__Bacteria.p__Firmicutes.c__CFGB15209.o__OFGB15209.f__FGB15209.g__GGB9293.s__GGB9293_SGB14250 | s__GGB9293_SGB14250 | Group | Control_HealthySerosurvey | -2.82 | 0.802 | 63 | 15 | 0.000921 | 0.0434 |
There were species significantly (q-val < 0.05) associated with health/disease in Malawi only and in Bangladesh only.
The ones associated at only one site are written out to a file, you can look at them manually there.
Setting up the forest plots. Need to combine all three maaslin outputs, and then add in the patch data.
# combine the maaslines for bgd and mal, and then add in the nepal ones.
combined_maaslins <- inner_join_maaslins(bangladesh_taxonomic_maaslin, malawi_taxonomic_maaslin, '_bang', '_mal', 'metaphlan')
combined_maaslins <- inner_join_maaslins(combined_maaslins, nepal_taxonomic_maaslin, 'not', 'used', 'metaphlan')
# for the nepal columns, give them a "_nep" suffix
combined_maaslins <- combined_maaslins %>%
rename_with(~ paste0(.x, '_nep'), c("coef", "stderr", "N", "N.not.0", "pval", "qval"))
# get the patch data
patch_taxonomic_maaslin <- read_tsv(file.path(patch_maaslin_taxonomic_output_root_folder, 'baseline_typhi_species', 'all_results.tsv'))
patch_taxonomic_maaslin$lowest_taxonomic_level <- sapply(str_split(patch_taxonomic_maaslin$feature, "\\."), function(x) x[length(x)])
patch_taxonomic_maaslin <- patch_taxonomic_maaslin %>% relocate(lowest_taxonomic_level, .after = feature)
patch_taxonomic_maaslin <- patch_taxonomic_maaslin %>%
rename_with(~ paste0(.x, '_patch'), c("coef", "stderr", "N", "N.not.0", "pval", "qval"))
# in patch_taxonomic_maaslin, change all Diagnosis in metadata column to Group, and all 'no_disease' in value column to 'ControlHealthySerosurvey'
# this is to match the other maaslin outputs
patch_taxonomic_maaslin <- patch_taxonomic_maaslin %>% filter(metadata == 'Diagnosis')
# in patch_taxonomic_maaslin, change all Diagnosis in metadata column to Group, and all 'no_disease' in value column to 'ControlHealthySerosurvey'
patch_taxonomic_maaslin$metadata <- ifelse(patch_taxonomic_maaslin$metadata == 'Diagnosis', 'Group', patch_taxonomic_maaslin$metadata)
patch_taxonomic_maaslin$value <- ifelse(patch_taxonomic_maaslin$value == 'no_disease', 'Control_HealthySerosurvey', patch_taxonomic_maaslin$value)
# join the patch data to the combined maaslin data
combined_maaslins <- combined_maaslins %>% left_join(patch_taxonomic_maaslin, by = c("feature", 'metadata', 'value', 'lowest_taxonomic_level'))
for the forest plot, i also want to include the per-cohort abundance medians for the species of interest.
groups_to_analyse <- c('Acute typhoid', 'Household contact')
prevalence <- get_prevalence(strataa_metaphlan_data_species, groups_to_analyse)
do the forest plot
# prevalence is the relative abundance data, the species of interest are the ones that are significantly associated with health/disease in two countries, and combined_maaslins is the maaslin output with the associations
run_forest_plot(prevalence, c('s__Prevotella_copri_clade_A', 's__Clostridium_SGB6179', 's__GGB4266_SGB5809', 's__Haemophilus_parainfluenzae'), combined_maaslins)
strataa_metaphlan_data_longer <- strataa_metaphlan_data %>% mutate(feature = rownames(strataa_metaphlan_data)) %>% pivot_longer(!c(feature, lowest_taxonomic_level), names_to = "SampleID", values_to = "prevalence")
# View(head(strataa_metaphlan_data_longer))
strataa_metaphlan_data_longer_meta <- strataa_metaphlan_data_longer %>% left_join(metadata, by = c("SampleID" = "SampleID"))
pc <- run_plot_species_of_interest(strataa_metaphlan_data_longer_meta, 's__Prevotella_copri_clade_A', participant_group_colours)
cs <- run_plot_species_of_interest(strataa_metaphlan_data_longer_meta, 's__Clostridium_SGB6179', participant_group_colours)
SGB5809 <-run_plot_species_of_interest(strataa_metaphlan_data_longer_meta, 's__GGB4266_SGB5809', participant_group_colours)
hp <- run_plot_species_of_interest(strataa_metaphlan_data_longer_meta, 's__Haemophilus_parainfluenzae', participant_group_colours)
# rt <- run_plot_species_of_interest(strataa_metaphlan_data_longer_meta, 's__Romboutsia_timonensis', participant_group_colours)
# lb <- run_plot_species_of_interest(strataa_metaphlan_data_longer_meta, 's__Lachnospiraceae_bacterium', participant_group_colours)
# pc
# pc + cs + SGB5809 + hp + rt + lb
# metadata_for_phyla_plots <- metadata %>% dplyr::select(SampleID, Group, Country)
phyla_clean_metadata <- prep_data_to_plot_phyla(strataa_metaphlan_data, metadata)
order_of_groups <- c("High Vi-titre", "Household contact")
# bangladesh_phyla_plot <- plot_per_country_abundance(phyla_clean_metadata = phyla_clean_metadata, country = "Bangladesh", group_order = order_of_groups)
# bangladesh_phyla_plot
malawi_phyla_plot <- plot_per_country_abundance(phyla_clean_metadata = phyla_clean_metadata, country = "Malawi", group_order = order_of_groups)
nepal_phyla_plot <- plot_per_country_abundance(phyla_clean_metadata = phyla_clean_metadata, country = "Nepal", group_order = order_of_groups)
# bangladesh_phyla_plot /
malawi_phyla_plot / nepal_phyla_plot + plot_layout(guides = 'collect')
Alpha diversity - all countries, healthy and carrier
all_countries_healthy_carrier_alpha <- metaphlan_alpha(strataa_metaphlan_data_species, metadata, countries_of_interest = c('Malawi', 'Nepal'), groups_of_interest = c('High Vi-titre', 'Household contact'), comparisons = list(c('High Vi-titre', 'Household contact')), participant_group_colours)
all_countries_healthy_carrier_alpha$alpha_by_country
all_countries_healthy_carrier_alpha$alpha_anova_summary_with_var_names %>% dplyr::mutate_if(is.numeric, funs(as.character(signif(., 3)))) %>% kbl() %>% kable_styling()
| rownames(alpha_anova_summary[[1]]) | Df | Sum.Sq | Mean.Sq | F.value | Pr..F. | is_it_significant |
|---|---|---|---|---|---|---|
| Country:Group | 1 | 9.7 | 9.7 | 32.3 | 1.12e-07 | significant |
| Group | 1 | 1.65 | 1.65 | 5.49 | 0.0209 | not_significant |
| Group:Age | 1 | 1.65 | 1.65 | 5.49 | 0.021 | not_significant |
| Country | 1 | 1.32 | 1.32 | 4.38 | 0.0387 | not_significant |
| Country:Sex:Age | 1 | 0.394 | 0.394 | 1.31 | 0.255 | not_significant |
| Sex:Group | 1 | 0.382 | 0.382 | 1.27 | 0.262 | not_significant |
| Country:Group:Age | 1 | 0.254 | 0.254 | 0.844 | 0.36 | not_significant |
| Sex | 1 | 0.0687 | 0.0687 | 0.229 | 0.634 | not_significant |
| Age | 1 | 0.0611 | 0.0611 | 0.203 | 0.653 | not_significant |
| Country:Sex:Group:Age | 1 | 0.0412 | 0.0412 | 0.137 | 0.712 | not_significant |
| Country:Age | 1 | 0.0393 | 0.0393 | 0.131 | 0.718 | not_significant |
| Country:Sex:Group | 1 | 0.0326 | 0.0326 | 0.108 | 0.743 | not_significant |
| Sex:Group:Age | 1 | 0.0323 | 0.0323 | 0.107 | 0.744 | not_significant |
| Country:Sex | 1 | 0.00583 | 0.00583 | 0.0194 | 0.89 | not_significant |
| Sex:Age | 1 | 0.000729 | 0.000729 | 0.00242 | 0.961 | not_significant |
| Residuals | 110 | 33.1 | 0.301 | NA | NA | NA |
all_countries_healthy_carrier_alpha$alpha_plot_group
High Vi-titre vs healthy.
all_countries_beta_carrier_healthy <- strataa_metaphlan_beta(strataa_metaphlan_data_species, metadata, c('Malawi', 'Nepal'), c('High Vi-titre', 'Household contact'), participant_group_colours)
all_countries_beta_carrier_healthy$pn_res %>% kbl %>% kable_styling()
| R2 | Pr(>F) | is_it_significant | |
|---|---|---|---|
| Group | 0.0861708 | 0.0009990 | significant |
| Group:Age | 0.0142600 | 0.0199800 | not_significant |
| Age | 0.0115688 | 0.0639361 | not_significant |
| Sex:Age | 0.0111861 | 0.0839161 | not_significant |
| Sex | 0.0082620 | 0.2517483 | not_significant |
| Sex:Group:Age | 0.0078819 | 0.3546454 | not_significant |
| Sex:Group | 0.0070955 | 0.4265734 | not_significant |
| Total | 1.0000000 | NA | NA |
| Residual | 0.8535749 | NA | NA |
# bgd_beta_carrier_healthy <- strataa_metaphlan_beta(strataa_metaphlan_data_species, metadata, c('Bangladesh'), c('High Vi-titre', 'Household contact'))
# bgd_beta_carrier_healthy$pn_res %>% kbl %>% kable_styling()
mal_beta_carrier_healthy <- strataa_metaphlan_beta(strataa_metaphlan_data_species, metadata, c('Malawi'), c('High Vi-titre', 'Household contact'), participant_group_colours)
mal_beta_carrier_healthy$pn_res %>% kbl %>% kable_styling()
| R2 | Pr(>F) | is_it_significant | |
|---|---|---|---|
| Group | 0.2418944 | 0.0009990 | significant |
| Sex:Age | 0.0229140 | 0.0079920 | significant |
| Age | 0.0164550 | 0.0819181 | not_significant |
| Group:Age | 0.0127858 | 0.1598402 | not_significant |
| Sex | 0.0131844 | 0.1638362 | not_significant |
| Sex:Group | 0.0105540 | 0.3036963 | not_significant |
| Sex:Group:Age | 0.0051988 | 0.8701299 | not_significant |
| Total | 1.0000000 | NA | NA |
| Residual | 0.6770134 | NA | NA |
nep_beta_carrier_healthy <- strataa_metaphlan_beta(strataa_metaphlan_data_species, metadata, c('Nepal'), c('High Vi-titre', 'Household contact'), participant_group_colours)
nep_beta_carrier_healthy$pn_res %>% kbl %>% kable_styling()
| R2 | Pr(>F) | is_it_significant | |
|---|---|---|---|
| Group | 0.0499326 | 0.0009990 | significant |
| Sex:Group | 0.0246972 | 0.2397602 | not_significant |
| Sex | 0.0219270 | 0.4055944 | not_significant |
| Age | 0.0212371 | 0.4525475 | not_significant |
| Sex:Age | 0.0206648 | 0.4685315 | not_significant |
| Sex:Group:Age | 0.0178023 | 0.7562438 | not_significant |
| Group:Age | 0.0168350 | 0.8241758 | not_significant |
| Total | 1.0000000 | NA | NA |
| Residual | 0.8269040 | NA | NA |
mal_beta_carrier_healthy$pc12 + nep_beta_carrier_healthy$pc12 + plot_layout(guides = 'collect')
# groups_to_analyse <- c('Acute_typhi', 'Control_HealthySerosurvey')
groups_to_analyse <- c('Carrier', 'Control_HealthySerosurvey')
# bang_variables_for_analysis <- c("Group", "Sex", "Age")
mwi_variables_for_analysis <- c("Group", "Sex", "Age", "sequencing_lane")
nep_variables_for_analysis <- c("Group", "Sex", "Age")
# bangladesh_taxonomic_maaslin <- read_in_maaslin('Bangladesh', groups_to_analyse, bang_variables_for_analysis, 'metaphlan')
malawi_taxonomic_maaslin <- read_in_maaslin('Malawi', groups_to_analyse, mwi_variables_for_analysis, 'metaphlan')
nepal_taxonomic_maaslin <- read_in_maaslin('Nepal', groups_to_analyse, nep_variables_for_analysis, 'metaphlan')
# bangladesh_taxonomic_maaslin_filtered <- filter_taxonomic_maaslin(bangladesh_taxonomic_maaslin)
malawi_taxonomic_maaslin_filtered <- filter_taxonomic_maaslin(malawi_taxonomic_maaslin)
nepal_taxonomic_maaslin_filtered <- filter_taxonomic_maaslin(nepal_taxonomic_maaslin)
# bangladesh_maaslin_stats <- basic_maaslin_stats(bangladesh_taxonomic_maaslin_filtered, 'Bangladesh', bang_variables_for_analysis, groups_to_analyse)
malawi_maaslin_stats <- basic_maaslin_stats(malawi_taxonomic_maaslin_filtered, 'Malawi', mwi_variables_for_analysis, groups_to_analyse)
nepal_maaslin_stats <- basic_maaslin_stats(nepal_taxonomic_maaslin_filtered, 'Nepal', nep_variables_for_analysis, groups_to_analyse)
nrow(malawi_maaslin_stats$maaslin_results_sig)
## [1] 125
nrow(nepal_maaslin_stats$maaslin_results_sig)
## [1] 1
We’re not including the bangladesh samples in this analysis, because the bangladesh carriers were processed differently (extracted without being frozen).
There are no species significantly associated with carrier status in both Mal and Nep, so not doing the combine.